691595264a59c8cf4f6b716376fd8a5cf374e119,src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java,ProtobufAnnotationSerializer,fromProto,#CoreNLPProtos.Document#,691
Before Change
if (!tokens.isEmpty() && sentence.hasTokenOffsetBegin() && sentence.hasTokenOffsetEnd()) {
// Set tokens for sentence
int tokenBegin = Math.min(sentence.getTokenOffsetBegin(), tokens.size());
int tokenEnd = Math.min(sentence.getTokenOffsetEnd(), tokens.size());
map.set(TokensAnnotation.class, tokens.subList(tokenBegin, tokenEnd));
// Set sentence index + token index + paragraph index
for (int i = tokenBegin; i < tokenEnd; ++i) {
After Change
map.get(TokensAnnotation.class) == null) {
// Set tokens for sentence
int tokenBegin = sentence.getTokenOffsetBegin();
int tokenEnd = sentence.getTokenOffsetEnd();
assert tokenBegin <= tokens.size() && tokenBegin <= tokenEnd;
assert tokenEnd <= tokens.size();
map.set(TokensAnnotation.class, tokens.subList(tokenBegin, tokenEnd));
// Set sentence index + token index + paragraph index
for (int i = tokenBegin; i < tokenEnd; ++i) {
tokens.get(i).setSentIndex(sentIndex);
tokens.get(i).setIndex(i - sentence.getTokenOffsetBegin() + 1);
if (sentence.hasParagraph()) { tokens.get(i).set(ParagraphAnnotation.class, sentence.getParagraph()); }
}
// Set text
int characterBegin = sentence.getCharacterOffsetBegin();
int characterEnd = sentence.getCharacterOffsetEnd();
if (characterEnd <= proto.getText().length()) {
// The usual case -- get the text from the document text
map.set(TextAnnotation.class, proto.getText().substring(characterBegin, characterEnd));
} else {
// The document text is wrong -- guess the text from the tokens
map.set(TextAnnotation.class, recoverOriginalText(tokens.subList(tokenBegin, tokenEnd)));
}
}
// End iteration